#!/home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
import sys
import pandas as pd
import numpy as np
import datetime as d
import argparse
'''
合并三类特征
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('inital_enrollment')
    parser.add_argument('enroll_feas')
    parser.add_argument('user_feas')
    parser.add_argument('course_feas')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
inital_enrollment_path = args['inital_enrollment']
enroll_feas_paths = args['enroll_feas'].split()
user_fea_paths = args['user_feas'].split()
course_fea_paths = args['course_feas'].split()
output = args['output']

def merge_sub_features(paths,  id):
    merged_fea = None
    for path in paths:
        data = pd.read_csv(path)
        data = data.set_index(id)
        if merged_fea is None:
            merged_fea = data
        else:
            merged_fea = pd.merge(merged_fea, data, left_index=True, right_index=True, how='outer')
    return merged_fea

if user_fea_paths:
    print user_fea_paths
    user_fea = merge_sub_features(user_fea_paths,  'username')
    #print user_fea.head()
if course_fea_paths:
    print course_fea_paths
    course_fea = merge_sub_features(course_fea_paths,  'course_id')
    #print course_fea.head()
if enroll_feas_paths:
    print enroll_feas_paths
    enroll_fea = merge_sub_features(enroll_feas_paths, 'enrollment_id')
    #print enroll_fea.head()


inital_enrollment = pd.read_csv(inital_enrollment_path)


def merge_field(fea, prefix, id):
    fea.columns = [prefix + '-' + field for field in fea.columns]
    return pd.merge(inital_enrollment, fea, left_on=id, right_index=True, how='left')


if enroll_feas_paths:        
    inital_enrollment = merge_field(enroll_fea, 'enroll', 'enrollment_id')
if user_fea_paths:
    inital_enrollment = merge_field(user_fea, 'user', 'username')
if course_fea_paths:
    inital_enrollment = merge_field(course_fea, 'course', 'course_id')

print 'output to ', output
print inital_enrollment.head()
inital_enrollment.to_csv(output, index=False)
